library(lavaan)
library(dplyr)
library(reticulate)
library(marginaleffects)
library(modelsummary)
library(ggplot2)
library(egg)
library(lme4)
library(semPlot)
library(tinytable)
library(kableExtra)
library(reshape2)
reticulate::py_run_string("import pymc as pm")
options(rstudio.python.installationPath = "/Users/nathanielforde/mambaforge/envs")
options("modelsummary_factory_default" = "tinytable")
options(repr.plot.width=15, repr.plot.height=8)
knitr::knit_engines$set(python = reticulate::eng_python)
options(scipen=999)
set.seed(130)
Warning
This is a work in progress. We intend to elaborate the choices related to analysing and understanding the data elicted through intentional psychometrics surveys
Measurment and Measurment Constructs
df = read.csv('sem_data.csv')
inflate <- df$region == 'west'
noise <- rnorm(sum(inflate), 0.5, 1) # generate the noise to add
df$ls_p3[inflate] <- df$ls_p3[inflate] + noise
df$ls_sum <- df$ls_p1 + df$ls_p2 + df$ls_p3
df$ls_mean <- rowMeans(df[ c('ls_p1', 'ls_p2', 'ls_p3')])
head(df) |> kable()| ID | region | gender | age | se_acad_p1 | se_acad_p2 | se_acad_p3 | se_social_p1 | se_social_p2 | se_social_p3 | sup_friends_p1 | sup_friends_p2 | sup_friends_p3 | sup_parents_p1 | sup_parents_p2 | sup_parents_p3 | ls_p1 | ls_p2 | ls_p3 | ls_sum | ls_mean |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | west | female | 13 | 4.857143 | 5.571429 | 4.500000 | 5.80 | 5.500000 | 5.40 | 6.5 | 6.5 | 7.0 | 7.0 | 7.0 | 6.0 | 5.333333 | 6.75 | 7.647112 | 19.73044 | 6.576815 |
| 2 | west | male | 14 | 4.571429 | 4.285714 | 4.666667 | 5.00 | 5.500000 | 4.80 | 4.5 | 4.5 | 5.5 | 5.0 | 6.0 | 4.5 | 4.333333 | 5.00 | 4.469623 | 13.80296 | 4.600985 |
| 10 | west | female | 14 | 4.142857 | 6.142857 | 5.333333 | 5.20 | 4.666667 | 6.00 | 4.0 | 4.5 | 3.5 | 7.0 | 7.0 | 6.5 | 6.333333 | 5.50 | 4.710020 | 16.54335 | 5.514451 |
| 11 | west | female | 14 | 5.000000 | 5.428571 | 4.833333 | 6.40 | 5.833333 | 6.40 | 7.0 | 7.0 | 7.0 | 7.0 | 7.0 | 7.0 | 4.333333 | 6.50 | 5.636198 | 16.46953 | 5.489844 |
| 12 | west | female | 14 | 5.166667 | 5.600000 | 4.800000 | 5.25 | 5.400000 | 5.25 | 7.0 | 7.0 | 7.0 | 6.5 | 6.5 | 7.0 | 5.666667 | 6.00 | 5.266592 | 16.93326 | 5.644419 |
| 14 | west | male | 14 | 4.857143 | 4.857143 | 4.166667 | 5.20 | 5.000000 | 4.20 | 5.5 | 6.5 | 7.0 | 6.5 | 6.5 | 6.5 | 5.000000 | 5.50 | 5.913709 | 16.41371 | 5.471236 |
#| code-fold: true
#| code-summary: "Show the code"
datasummary_skim(df)|>
style_tt(
i = 15:17,
j = 1:1,
background = "#20AACC",
color = "white",
italic = TRUE) |>
style_tt(
i = 18:19,
j = 1:1,
background = "#2888A0",
color = "white",
italic = TRUE) |>
style_tt(
i = 2:14,
j = 1:1,
background = "#17C2AD",
color = "white",
italic = TRUE)| Unique | Missing Pct. | Mean | SD | Min | Median | Max | Histogram | |
|---|---|---|---|---|---|---|---|---|
| ID | 283 | 0 | 187.9 | 106.3 | 1.0 | 201.0 | 367.0 | |
| age | 5 | 0 | 14.7 | 0.8 | 13.0 | 15.0 | 17.0 | |
| se_acad_p1 | 32 | 0 | 5.2 | 0.8 | 3.1 | 5.1 | 7.0 | |
| se_acad_p2 | 36 | 0 | 5.3 | 0.7 | 3.1 | 5.4 | 7.0 | |
| se_acad_p3 | 29 | 0 | 5.2 | 0.8 | 2.8 | 5.2 | 7.0 | |
| se_social_p1 | 24 | 0 | 5.3 | 0.8 | 1.8 | 5.4 | 7.0 | |
| se_social_p2 | 27 | 0 | 5.5 | 0.7 | 2.7 | 5.5 | 7.0 | |
| se_social_p3 | 31 | 0 | 5.4 | 0.8 | 3.0 | 5.5 | 7.0 | |
| sup_friends_p1 | 13 | 0 | 5.8 | 1.1 | 1.0 | 6.0 | 7.0 | |
| sup_friends_p2 | 10 | 0 | 6.0 | 0.9 | 2.5 | 6.0 | 7.0 | |
| sup_friends_p3 | 13 | 0 | 6.0 | 1.1 | 1.0 | 6.0 | 7.0 | |
| sup_parents_p1 | 11 | 0 | 6.0 | 1.1 | 1.0 | 6.0 | 7.0 | |
| sup_parents_p2 | 11 | 0 | 5.9 | 1.1 | 2.0 | 6.0 | 7.0 | |
| sup_parents_p3 | 13 | 0 | 5.7 | 1.1 | 1.0 | 6.0 | 7.0 | |
| ls_p1 | 15 | 0 | 5.2 | 0.9 | 2.0 | 5.3 | 7.0 | |
| ls_p2 | 21 | 0 | 5.8 | 0.7 | 2.5 | 5.8 | 7.0 | |
| ls_p3 | 161 | 0 | 5.5 | 1.1 | 1.7 | 5.6 | 9.6 | |
| ls_sum | 218 | 0 | 16.5 | 2.2 | 8.2 | 16.7 | 21.0 | |
| ls_mean | 217 | 0 | 5.5 | 0.7 | 2.7 | 5.6 | 7.0 | |
| N | % | |||||||
| region | east | 142 | 50.2 | |||||
| west | 141 | 49.8 | ||||||
| gender | female | 132 | 46.6 | |||||
| male | 151 | 53.4 |
drivers = c('se_acad_p1', 'se_acad_p2', 'se_acad_p3', 'se_social_p1', 'se_social_p2', 'se_social_p3', 'sup_friends_p1','sup_friends_p2', 'sup_friends_p3', 'sup_parents_p1' , 'sup_parents_p2' , 'sup_parents_p3', 'ls_p1', 'ls_p2', 'ls_p3')
plot_heatmap <- function(df, title="Sample Covariances", subtitle="Observed Measures") {
heat_df = df |> as.matrix() |> melt()
colnames(heat_df) <- c("x", "y", "value")
g <- heat_df |> mutate(value = round(value, 2)) |> ggplot(aes(x = x, y = y, fill = value)) +
geom_tile() + geom_text(aes(label = value), color = "black", size = 4) +
scale_fill_gradient2(
high = 'dodgerblue4',
mid = 'white',
low = 'firebrick2'
) + theme(axis.text.x = element_text(angle=45)) + ggtitle(title, subtitle)
g
}
g1 = plot_heatmap(cov(df[, drivers]))
g2 = plot_heatmap(cor(df[, drivers]), "Sample Correlations")
plot <- ggarrange(g1,g2, ncol=1, nrow=2);Fit Initial Regression Models
formula_sum_1st = " ls_sum ~ se_acad_p1 + se_social_p1 + sup_friends_p1 + sup_parents_p1"
formula_mean_1st = " ls_mean ~ se_acad_p1 + se_social_p1 + sup_friends_p1 + sup_parents_p1"
formula_sum_12 = " ls_sum ~ se_acad_p1 + se_acad_p2 + se_social_p1 + se_social_p2 +
sup_friends_p1 + sup_friends_p2 + sup_parents_p1 + sup_parents_p2"
formula_mean_12 = " ls_mean ~ se_acad_p1 + se_acad_p2 + se_social_p1 + se_social_p2 +
sup_friends_p1 + sup_friends_p2 + sup_parents_p1 + sup_parents_p2"
formula_sum = " ls_sum ~ se_acad_p1 + se_acad_p2 + se_acad_p3 + se_social_p1 + se_social_p2 + se_social_p3 + sup_friends_p1 + sup_friends_p2 + sup_friends_p3 + sup_parents_p1 + sup_parents_p2 + sup_parents_p3"
formula_mean = " ls_mean ~ se_acad_p1 + se_acad_p2 + se_acad_p3 + se_social_p1 + se_social_p2 + se_social_p3 + sup_friends_p1 + sup_friends_p2 + sup_friends_p3 + sup_parents_p1 + sup_parents_p2 + sup_parents_p3"
mod_sum = lm(formula_sum, df)
mod_sum_1st = lm(formula_sum_1st, df)
mod_sum_12 = lm(formula_sum_12, df)
mod_mean = lm(formula_mean, df)
mod_mean_1st = lm(formula_mean_1st, df)
mod_mean_12 = lm(formula_mean_12, df)
min_max_norm <- function(x) {
(x - min(x)) / (max(x) - min(x))
}
df_norm <- as.data.frame(lapply(df[c(5:19)], min_max_norm))
df_norm$ls_sum <- df$ls_sum
df_norm$ls_mean <- df$ls_mean
mod_sum_norm = lm(formula_sum, df_norm)
mod_mean_norm = lm(formula_mean, df_norm)
models = list(
"Outcome: sum_score" = list("model_sum_1st_factors" = mod_sum_1st,
"model_sum_1st_2nd_factors" = mod_sum_12,
"model_sum_score" = mod_sum,
"model_sum_score_norm" = mod_sum_norm
),
"Outcome: mean_score" = list(
"model_mean_1st_factors" = mod_mean_1st,
"model_mean_1st_2nd_factors" = mod_mean_12,
"model_mean_score"= mod_mean,
"model_mean_score_norm" = mod_mean_norm
)
)#| code-fold: true
#| code-summary: "Show the code"
modelsummary(models, stars=TRUE, shape ="cbind") |>
style_tt(
i = 2:25,
j = 1:1,
background = "#17C2AD",
color = "white",
italic = TRUE)| Outcome: sum_score | Outcome: mean_score | |||||||
|---|---|---|---|---|---|---|---|---|
| model_sum_1st_factors | model_sum_1st_2nd_factors | model_sum_score | model_sum_score_norm | model_mean_1st_factors | model_mean_1st_2nd_factors | model_mean_score | model_mean_score_norm | |
| + p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001 | ||||||||
| (Intercept) | 6.335*** | 4.038*** | 3.709*** | 9.081*** | 2.112*** | 1.346*** | 1.236*** | 3.027*** |
| (0.979) | (1.080) | (1.071) | (0.739) | (0.326) | (0.360) | (0.357) | (0.246) | |
| se_acad_p1 | 0.231 | -0.023 | -0.068 | -0.264 | 0.077 | -0.008 | -0.023 | -0.088 |
| (0.158) | (0.197) | (0.216) | (0.833) | (0.053) | (0.066) | (0.072) | (0.278) | |
| se_social_p1 | 1.039*** | 0.515* | 0.401+ | 2.085+ | 0.346*** | 0.172* | 0.134+ | 0.695+ |
| (0.175) | (0.224) | (0.224) | (1.167) | (0.058) | (0.075) | (0.075) | (0.389) | |
| sup_friends_p1 | 0.069 | -0.263+ | -0.273 | -1.636 | 0.023 | -0.088+ | -0.091 | -0.545 |
| (0.095) | (0.145) | (0.169) | (1.011) | (0.032) | (0.048) | (0.056) | (0.337) | |
| sup_parents_p1 | 0.518*** | 0.196 | 0.050 | 0.299 | 0.173*** | 0.065 | 0.017 | 0.100 |
| (0.107) | (0.155) | (0.161) | (0.964) | (0.036) | (0.052) | (0.054) | (0.321) | |
| se_acad_p2 | 0.415+ | 0.382+ | 1.475+ | 0.138+ | 0.127+ | 0.492+ | ||
| (0.216) | (0.227) | (0.875) | (0.072) | (0.076) | (0.292) | |||
| se_social_p2 | 0.662** | 0.483+ | 2.093+ | 0.221** | 0.161+ | 0.698+ | ||
| (0.233) | (0.246) | (1.065) | (0.078) | (0.082) | (0.355) | |||
| sup_friends_p2 | 0.358* | 0.366* | 1.647* | 0.119* | 0.122* | 0.549* | ||
| (0.172) | (0.180) | (0.808) | (0.057) | (0.060) | (0.269) | |||
| sup_parents_p2 | 0.375* | 0.166 | 0.829 | 0.125* | 0.055 | 0.276 | ||
| (0.151) | (0.162) | (0.808) | (0.050) | (0.054) | (0.269) | |||
| se_acad_p3 | -0.072 | -0.302 | -0.024 | -0.101 | ||||
| (0.196) | (0.815) | (0.065) | (0.272) | |||||
| se_social_p3 | 0.350+ | 1.400+ | 0.117+ | 0.467+ | ||||
| (0.180) | (0.721) | (0.060) | (0.240) | |||||
| sup_friends_p3 | 0.056 | 0.334 | 0.019 | 0.111 | ||||
| (0.146) | (0.878) | (0.049) | (0.293) | |||||
| sup_parents_p3 | 0.452** | 2.710** | 0.151** | 0.903** | ||||
| (0.141) | (0.847) | (0.047) | (0.282) | |||||
| Num.Obs. | 283 | 283 | 283 | 283 | 283 | 283 | 283 | 283 |
| R2 | 0.332 | 0.389 | 0.420 | 0.420 | 0.332 | 0.389 | 0.420 | 0.420 |
| R2 Adj. | 0.323 | 0.371 | 0.394 | 0.394 | 0.323 | 0.371 | 0.394 | 0.394 |
| AIC | 1134.2 | 1117.0 | 1110.3 | 1110.3 | 512.4 | 495.2 | 488.5 | 488.5 |
| BIC | 1156.1 | 1153.5 | 1161.3 | 1161.3 | 534.2 | 531.7 | 539.5 | 539.5 |
| Log.Lik. | -561.090 | -548.507 | -541.139 | -541.139 | -250.183 | -237.600 | -230.232 | -230.232 |
| RMSE | 1.76 | 1.68 | 1.64 | 1.64 | 0.59 | 0.56 | 0.55 | 0.55 |
models = list(
"model_sum_1st_factors" = mod_sum_1st,
"model_sum_1st_2nd_factors" = mod_sum_12,
"model_sum_score" = mod_sum,
"model_sum_score_norm" = mod_sum_norm,
"model_mean_1st_factors" = mod_mean_1st,
"model_mean_1st_2nd_factors" = mod_mean_12,
"model_mean_score"= mod_mean,
"model_mean_score_norm" = mod_mean_norm
)
modelplot(models, coef_omit = 'Intercept') + geom_vline(xintercept = 0, linetype="dotted",
color = "black") + ggtitle("Comparing Model Parameter Estimates", "Across Covariates")Significant Coefficients
g1 = modelplot(mod_sum, coef_omit = 'Intercept') + aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
scale_color_manual(values = c("grey", "black"), guide = "none")
g2 = modelplot(mod_mean, coef_omit = 'Intercept') + aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
scale_color_manual(values = c("grey", "black"))
plot <- ggarrange(g1,g2, ncol=2, nrow=1);Aggregate Driver Scores
df$se_acad_mean <- rowMeans(df[c('se_acad_p1', 'se_acad_p2', 'se_acad_p3')])
df$se_social_mean <- rowMeans(df[c('se_social_p1', 'se_social_p2', 'se_social_p3')])
df$sup_friends_mean <- rowMeans(df[c('sup_friends_p1', 'sup_friends_p2', 'sup_friends_p3')])
df$sup_parents_mean <- rowMeans(df[c('sup_parents_p1', 'sup_parents_p2', 'sup_parents_p3')])
formula_parcel_mean = "ls_mean ~ se_acad_mean + se_social_mean + sup_friends_mean + sup_parents_mean"
formula_parcel_sum = "ls_sum ~ se_acad_mean + se_social_mean + sup_friends_mean + sup_parents_mean"
mod_sum_parcel = lm(formula_parcel_sum, df)
mod_mean_parcel = lm(formula_parcel_mean, df)
models_parcel = list("model_sum_score" = mod_sum_parcel,
"model_mean_score"= mod_mean_parcel
)
modelsummary(models_parcel, stars=TRUE)| model_sum_score | model_mean_score | |
|---|---|---|
| + p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001 | ||
| (Intercept) | 4.378*** | 1.459*** |
| (1.036) | (0.345) | |
| se_acad_mean | 0.252 | 0.084 |
| (0.176) | (0.059) | |
| se_social_mean | 1.205*** | 0.402*** |
| (0.195) | (0.065) | |
| sup_friends_mean | 0.049 | 0.016 |
| (0.108) | (0.036) | |
| sup_parents_mean | 0.685*** | 0.228*** |
| (0.111) | (0.037) | |
| Num.Obs. | 283 | 283 |
| R2 | 0.397 | 0.397 |
| R2 Adj. | 0.388 | 0.388 |
| AIC | 1105.3 | 483.5 |
| BIC | 1127.1 | 505.3 |
| Log.Lik. | -546.638 | -235.731 |
| RMSE | 1.67 | 0.56 |
g1 = modelplot(mod_sum_parcel, coef_omit = 'Intercept') + aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
scale_color_manual(values = c("grey", "black"), guide = "none")
g2 = modelplot(mod_mean_parcel, coef_omit = 'Intercept') + aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
scale_color_manual(values = c("grey", "black"))
plot <- ggarrange(g1,g2, ncol=2, nrow=1);Hierarchical Models
formula_hierarchy_mean = "ls_mean ~ sup_parents_p1 + sup_parents_p2 + sup_parents_p3 + sup_friends_p1 + sup_friends_p2 + sup_friends_p3 + se_acad_p1 + se_acad_p2 + se_acad_p3 +
se_social_p1 + se_social_p2 + se_social_p3 + (1 | region)"
formula_hierarchy_sum = "ls_sum ~ sup_parents_p1 + sup_parents_p2 + sup_parents_p3 + sup_friends_p1 + sup_friends_p2 + sup_friends_p3 + se_acad_p1 + se_acad_p2 + se_acad_p3 +
se_social_p1 + se_social_p2 + se_social_p3 + (1 | region)"
hierarchical_mean_fit <- lmer(formula_hierarchy_mean, data = df, REML = TRUE)
hierarchical_sum_fit <- lmer(formula_hierarchy_sum, data = df, REML = TRUE)
g1 = modelplot(hierarchical_mean_fit, re.form=NA) + aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
scale_color_manual(values = c("grey", "black"), guide = "none")
g2 = modelplot(hierarchical_sum_fit, re.form=NA) + aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
scale_color_manual(values = c("grey", "black"))
plot <- ggarrange(g1,g2, ncol=2, nrow=1);Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_segment()`).
Removed 2 rows containing missing values or values outside the scale range
(`geom_segment()`).
modelsummary(list("hierarchical_mean_fit"= hierarchical_mean_fit,
"hierarchical_sum_fit"= hierarchical_sum_fit),
stars = TRUE) |>
style_tt(
i = 2:25,
j = 1:1,
background = "#17C2AD",
color = "white",
italic = TRUE)| hierarchical_mean_fit | hierarchical_sum_fit | |
|---|---|---|
| + p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001 | ||
| (Intercept) | 0.903* | 2.710* |
| (0.385) | (1.154) | |
| sup_parents_p1 | -0.007 | -0.021 |
| (0.053) | (0.160) | |
| sup_parents_p2 | 0.070 | 0.210 |
| (0.053) | (0.159) | |
| sup_parents_p3 | 0.160*** | 0.480*** |
| (0.046) | (0.139) | |
| sup_friends_p1 | -0.091+ | -0.274+ |
| (0.055) | (0.166) | |
| sup_friends_p2 | 0.125* | 0.376* |
| (0.059) | (0.177) | |
| sup_friends_p3 | 0.024 | 0.072 |
| (0.048) | (0.144) | |
| se_acad_p1 | -0.041 | -0.122 |
| (0.071) | (0.213) | |
| se_acad_p2 | 0.131+ | 0.393+ |
| (0.074) | (0.223) | |
| se_acad_p3 | 0.039 | 0.116 |
| (0.067) | (0.202) | |
| se_social_p1 | 0.094 | 0.283 |
| (0.075) | (0.224) | |
| se_social_p2 | 0.191* | 0.574* |
| (0.081) | (0.244) | |
| se_social_p3 | 0.130* | 0.389* |
| (0.059) | (0.178) | |
| SD (Intercept region) | 0.160 | 0.481 |
| SD (Observations) | 0.549 | 1.648 |
| Num.Obs. | 283 | 283 |
| R2 Marg. | 0.424 | 0.424 |
| R2 Cond. | 0.469 | 0.469 |
| AIC | 538.4 | 1131.6 |
| BIC | 593.1 | 1186.3 |
| ICC | 0.1 | 0.1 |
| RMSE | 0.54 | 1.61 |
Marginal Effects
pred <- predictions(hierarchical_mean_fit, newdata = datagrid(sup_parents_p3 = 1:10, region=c('east', 'west')), re.form=NA)
pred1 <- predictions(mod_mean, newdata = datagrid(sup_parents_p2 = 1:10))
pred1 |> head() |> kableExtra::kable()| rowid | estimate | std.error | statistic | p.value | s.value | conf.low | conf.high | se_acad_p1 | se_acad_p2 | se_acad_p3 | se_social_p1 | se_social_p2 | se_social_p3 | sup_friends_p1 | sup_friends_p2 | sup_friends_p3 | sup_parents_p1 | sup_parents_p3 | sup_parents_p2 | ls_mean |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 5.232276 | 0.2673967 | 19.56747 | 0 | 280.8136 | 4.708188 | 5.756363 | 5.154955 | 5.346853 | 5.211248 | 5.290047 | 5.477267 | 5.440989 | 5.786219 | 6.010601 | 5.991166 | 5.975265 | 5.719081 | 1 | 6.576815 |
| 2 | 5.287553 | 0.2140733 | 24.69973 | 0 | 445.0319 | 4.867977 | 5.707128 | 5.154955 | 5.346853 | 5.211248 | 5.290047 | 5.477267 | 5.440989 | 5.786219 | 6.010601 | 5.991166 | 5.975265 | 5.719081 | 2 | 6.576815 |
| 3 | 5.342829 | 0.1610972 | 33.16525 | 0 | 798.8130 | 5.027085 | 5.658574 | 5.154955 | 5.346853 | 5.211248 | 5.290047 | 5.477267 | 5.440989 | 5.786219 | 6.010601 | 5.991166 | 5.975265 | 5.719081 | 3 | 6.576815 |
| 4 | 5.398106 | 0.1089765 | 49.53461 | 0 | Inf | 5.184516 | 5.611696 | 5.154955 | 5.346853 | 5.211248 | 5.290047 | 5.477267 | 5.440989 | 5.786219 | 6.010601 | 5.991166 | 5.975265 | 5.719081 | 4 | 6.576815 |
| 5 | 5.453383 | 0.0599835 | 90.91472 | 0 | Inf | 5.335818 | 5.570949 | 5.154955 | 5.346853 | 5.211248 | 5.290047 | 5.477267 | 5.440989 | 5.786219 | 6.010601 | 5.991166 | 5.975265 | 5.719081 | 5 | 6.576815 |
| 6 | 5.508660 | 0.0334480 | 164.69327 | 0 | Inf | 5.443103 | 5.574217 | 5.154955 | 5.346853 | 5.211248 | 5.290047 | 5.477267 | 5.440989 | 5.786219 | 6.010601 | 5.991166 | 5.975265 | 5.719081 | 6 | 6.576815 |
Regression Marginal Effects
g = plot_predictions(mod_mean, condition = c("sup_parents_p3"), type = "response") + ggtitle("Counterfactual Shift of Outcome: sup_parents_p3", "Holding all else Fixed")
g1 = plot_predictions(mod_mean, condition = c("sup_friends_p1"), type = "response") + ggtitle("Counterfactual Shift of Outcome: sup_friends_p1", "Holding all else Fixed")
g2 = plot_predictions(mod_mean, condition = c("se_acad_p1"),
type = "response") + ggtitle("Counterfactual Shift of Outcome: se_acad_p1", "Holding all else Fixed")
plot <- ggarrange(g,g1,g2, ncol=1, nrow=3);Hierarchical Marginal Effects
g = plot_predictions(hierarchical_mean_fit, condition = c("sup_parents_p3", "region"), type = "response", re.form=NA) + ggtitle("Counterfactual Shift of Outcome: sup_parents_p3", "Holding all else Fixed")
g1 = plot_predictions(hierarchical_mean_fit, condition = c("sup_friends_p1", "region"), type = "response", re.form=NA) + ggtitle("Counterfactual Shift of Outcome: sup_friends_p1", "Holding all else Fixed")
g2 = plot_predictions(hierarchical_mean_fit, condition = c("se_acad_p1", "region"),
type = "response", re.form=NA) + ggtitle("Counterfactual Shift of Outcome: se_acad_p1", "Holding all else Fixed")
plot <- ggarrange(g,g1,g2, ncol=1, nrow=3);Confirmatory Factor Analysis
model_measurement <- "
# Measurement model
SUP_Parents =~ sup_parents_p1 + sup_parents_p2 + sup_parents_p3
SUP_Friends =~ sup_friends_p1 + sup_friends_p2 + sup_friends_p3
SE_Academic =~ se_acad_p1 + se_acad_p2 + se_acad_p3
SE_Social =~ se_social_p1 + se_social_p2 + se_social_p3
LS =~ ls_p1 + ls_p2 + ls_p3
"
model_measurement1 <- "
# Measurement model
SUP_Parents =~ sup_parents_p1 + sup_parents_p2 + sup_parents_p3
SUP_Friends =~ a1*sup_friends_p1 + a2*sup_friends_p2 + a3*sup_friends_p3
SE_Academic =~ se_acad_p1 + se_acad_p2 + se_acad_p3
SE_Social =~ se_social_p1 + se_social_p2 + se_social_p3
LS =~ ls_p1 + ls_p2 + ls_p3
a1 == a2
a1 == a3
"
fit_mod <- cfa(model_measurement, data = df)
fit_mod_1<- cfa(model_measurement1, data = df)
cfa_models = list("full_measurement_model" = fit_mod,
"measurement_model_reduced" = fit_mod_1)
modelplot(cfa_models)summary(fit_mod, fit.measures = TRUE, standardized = TRUE) lavaan 0.6-18 ended normally after 46 iterations
Estimator ML
Optimization method NLMINB
Number of model parameters 40
Number of observations 283
Model Test User Model:
Test statistic 207.137
Degrees of freedom 80
P-value (Chi-square) 0.000
Model Test Baseline Model:
Test statistic 2586.651
Degrees of freedom 105
P-value 0.000
User Model versus Baseline Model:
Comparative Fit Index (CFI) 0.949
Tucker-Lewis Index (TLI) 0.933
Loglikelihood and Information Criteria:
Loglikelihood user model (H0) -4394.212
Loglikelihood unrestricted model (H1) -4290.643
Akaike (AIC) 8868.423
Bayesian (BIC) 9014.241
Sample-size adjusted Bayesian (SABIC) 8887.401
Root Mean Square Error of Approximation:
RMSEA 0.075
90 Percent confidence interval - lower 0.062
90 Percent confidence interval - upper 0.088
P-value H_0: RMSEA <= 0.050 0.001
P-value H_0: RMSEA >= 0.080 0.263
Standardized Root Mean Square Residual:
SRMR 0.056
Parameter Estimates:
Standard errors Standard
Information Expected
Information saturated (h1) model Structured
Latent Variables:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
SUP_Parents =~
sup_parents_p1 1.000 0.938 0.876
sup_parents_p2 1.034 0.056 18.570 0.000 0.970 0.888
sup_parents_p3 0.988 0.059 16.637 0.000 0.927 0.811
SUP_Friends =~
sup_friends_p1 1.000 1.021 0.906
sup_friends_p2 0.793 0.043 18.466 0.000 0.809 0.857
sup_friends_p3 0.891 0.050 17.735 0.000 0.910 0.831
SE_Academic =~
se_acad_p1 1.000 0.697 0.880
se_acad_p2 0.806 0.049 16.278 0.000 0.561 0.819
se_acad_p3 0.952 0.058 16.491 0.000 0.663 0.828
SE_Social =~
se_social_p1 1.000 0.640 0.846
se_social_p2 0.959 0.055 17.338 0.000 0.614 0.881
se_social_p3 0.926 0.066 13.956 0.000 0.593 0.742
LS =~
ls_p1 1.000 0.677 0.729
ls_p2 0.820 0.078 10.488 0.000 0.555 0.762
ls_p3 0.744 0.109 6.809 0.000 0.504 0.459
Covariances:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
SUP_Parents ~~
SUP_Friends 0.134 0.064 2.094 0.036 0.140 0.140
SE_Academic 0.219 0.046 4.717 0.000 0.335 0.335
SE_Social 0.284 0.046 6.239 0.000 0.473 0.473
LS 0.360 0.056 6.455 0.000 0.567 0.567
SUP_Friends ~~
SE_Academic 0.071 0.047 1.493 0.135 0.100 0.100
SE_Social 0.195 0.046 4.262 0.000 0.299 0.299
LS 0.184 0.053 3.500 0.000 0.267 0.267
SE_Academic ~~
SE_Social 0.274 0.036 7.525 0.000 0.613 0.613
LS 0.212 0.039 5.407 0.000 0.449 0.449
SE_Social ~~
LS 0.330 0.043 7.650 0.000 0.761 0.761
Variances:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
.sup_parents_p1 0.268 0.037 7.143 0.000 0.268 0.233
.sup_parents_p2 0.253 0.038 6.595 0.000 0.253 0.212
.sup_parents_p3 0.446 0.048 9.262 0.000 0.446 0.342
.sup_friends_p1 0.228 0.040 5.663 0.000 0.228 0.179
.sup_friends_p2 0.237 0.030 7.926 0.000 0.237 0.266
.sup_friends_p3 0.371 0.042 8.811 0.000 0.371 0.310
.se_acad_p1 0.141 0.022 6.487 0.000 0.141 0.226
.se_acad_p2 0.154 0.018 8.648 0.000 0.154 0.329
.se_acad_p3 0.202 0.024 8.397 0.000 0.202 0.315
.se_social_p1 0.163 0.020 8.095 0.000 0.163 0.284
.se_social_p2 0.109 0.016 6.782 0.000 0.109 0.224
.se_social_p3 0.287 0.028 10.141 0.000 0.287 0.450
.ls_p1 0.404 0.048 8.422 0.000 0.404 0.469
.ls_p2 0.223 0.029 7.624 0.000 0.223 0.420
.ls_p3 0.951 0.085 11.123 0.000 0.951 0.789
SUP_Parents 0.880 0.098 8.937 0.000 1.000 1.000
SUP_Friends 1.042 0.111 9.405 0.000 1.000 1.000
SE_Academic 0.485 0.054 8.908 0.000 1.000 1.000
SE_Social 0.410 0.048 8.459 0.000 1.000 1.000
LS 0.458 0.072 6.323 0.000 1.000 1.000
g1 = plot_heatmap(cov(df[, drivers]))
g2 = plot_heatmap(data.frame(fitted(fit_mod)$cov), title="Model Implied Covariances", "Fitted Values")
plot <- ggarrange(g1,g2, ncol=1, nrow=2);Modification Indices
modindices(fit_mod, sort = TRUE, maximum.number = 6, standardized = FALSE, minimum.value = 5) lhs op rhs mi epc
152 sup_friends_p1 ~~ se_social_p3 19.245 0.095
68 SUP_Friends =~ ls_p2 17.491 0.181
64 SUP_Friends =~ se_social_p1 17.210 -0.136
60 SUP_Friends =~ sup_parents_p3 16.063 -0.187
210 ls_p2 ~~ ls_p3 13.931 -0.140
57 SUP_Parents =~ ls_p3 13.057 0.329
Summary Global Fit Measures
summary_df = cbind(fitMeasures(fit_mod, c("chisq", "baseline.chisq", "cfi", "aic", "bic", "rmsea","srmr")),
fitMeasures(fit_mod_1, c("chisq", "baseline.chisq", "cfi", "aic", "bic", "rmsea","srmr")))
colnames(summary_df) = c('Full Model', 'Reduced Model')
summary_df Full Model Reduced Model
chisq 207.13746909 225.18095274
baseline.chisq 2586.65112811 2586.65112811
cfi 0.94876900 0.94230416
aic 8868.42313715 8882.46662079
bic 9014.24101305 9020.99360290
rmsea 0.07493739 0.07854933
srmr 0.05595908 0.05904842
semPlot::semPaths(fit_mod, whatLabels = 'est', intercepts = FALSE)semPlot::semPaths(fit_mod_1, whatLabels = 'std', intercepts = FALSE)Comparing the empirical and implied variance-covariance matrix
heat_df = data.frame(resid(fit_mod, type = "standardized")$cov)
heat_df = heat_df |> as.matrix() |> melt()
colnames(heat_df) <- c("x", "y", "value")
g1 = heat_df |> mutate(value = round(value, 2)) |> ggplot(aes(x = x, y = y, fill = value)) +
geom_tile() + geom_text(aes(label = value), color = "black", size = 4) +
scale_fill_gradient2(
high = 'dodgerblue4',
mid = 'white',
low = 'firebrick2'
) + theme(axis.text.x = element_text(angle=45)) + ggtitle("Residuals of the Sample Covariances and Model Implied Covariances", "A Visual Check of Model fit: Full Model")
heat_df = data.frame(resid(fit_mod_1, type = "standardized")$cov)
heat_df = heat_df |> as.matrix() |> melt()
colnames(heat_df) <- c("x", "y", "value")
g2 = heat_df |> mutate(value = round(value, 2)) |> ggplot(aes(x = x, y = y, fill = value)) +
geom_tile() + geom_text(aes(label = value), color = "black", size = 4) +
scale_fill_gradient2(
high = 'dodgerblue4',
mid = 'white',
low = 'firebrick2'
) + theme(axis.text.x = element_text(angle=45)) + ggtitle("Residuals of the Sample Covariances and Model Implied Covariances", "A Visual Check of Model fit: Reduced Model")
plot <- ggarrange(g1,g2, ncol=1, nrow=2);anova(fit_mod)Chi-Squared Test Statistic (unscaled)
Df AIC BIC Chisq Chisq diff Df diff Pr(>Chisq)
Saturated 0 0.00
Model 80 8868.4 9014.2 207.14 207.14 80 0.0000000000003209 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(fit_mod_1)Chi-Squared Test Statistic (unscaled)
Df AIC BIC Chisq Chisq diff Df diff Pr(>Chisq)
Saturated 0 0.00
Model 82 8882.5 9021 225.18 225.18 82 0.000000000000002776 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(fit_mod, fit_mod_1)
Chi-Squared Difference Test
Df AIC BIC Chisq Chisq diff RMSEA Df diff Pr(>Chisq)
fit_mod 80 8868.4 9014.2 207.14
fit_mod_1 82 8882.5 9021.0 225.18 18.044 0.16836 2 0.0001208 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Structural Equation Models
model <- "
# Measurement model
SUP_Parents =~ sup_parents_p1 + sup_parents_p2 + sup_parents_p3
SUP_Friends =~ sup_friends_p1 + sup_friends_p2 + sup_friends_p3
SE_Academic =~ se_acad_p1 + se_acad_p2 + se_acad_p3
SE_Social =~ se_social_p1 + se_social_p2 + se_social_p3
LS =~ ls_p1 + ls_p2 + ls_p3
# Structural model
# Regressions
LS ~ SE_Academic + SE_Social + SUP_Parents + SUP_Friends
# Residual covariances
SE_Academic ~~ SE_Social
"
fit_mod_sem <- sem(model, data = df)modelplot(fit_mod_sem)semPlot::semPaths(fit_mod_sem, whatLabels = 'std', intercepts = FALSE)heat_df = data.frame(resid(fit_mod_sem, type = "standardized")$cov)
heat_df = heat_df |> as.matrix() |> melt()
colnames(heat_df) <- c("x", "y", "value")
heat_df |> mutate(value = round(value, 2)) |> ggplot(aes(x = x, y = y, fill = value)) +
geom_tile() + geom_text(aes(label = value), color = "black", size = 4) +
scale_fill_gradient2(
high = 'dodgerblue4',
mid = 'white',
low = 'firebrick2'
) + theme(axis.text.x = element_text(angle=45)) + ggtitle("Residuals of the Sample Covariances and Model Implied Covariances", "A Visual Check of Model fit")Confirmatory Factor Models with PyMC
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pytensor import tensor as pt
import arviz as az
import networkx as nx
df_p = pd.read_csv('IIS.dat', sep='\s+')
df_p.head() PI AD IGC FI FC
0 4.00 3.38 4.67 2.6 3.2
1 2.57 3.00 3.50 2.4 2.8
2 2.29 3.29 4.83 2.0 3.4
3 2.43 3.63 4.33 3.6 3.8
4 3.00 4.00 4.83 3.4 3.8
coords = {'obs': list(range(len(df_p))),
'indicators': ['PI', 'AD', 'IGC', 'FI', 'FC'],
'indicators_1': ['PI', 'AD', 'IGC'],
'indicators_2': ['FI', 'FC'],
'latent': ['Student', 'Faculty']
}
obs_idx = list(range(len(df_p)))
with pm.Model(coords=coords) as model:
Psi = pm.InverseGamma('Psi', 5, 10, dims='indicators')
lambdas_ = pm.Normal('lambdas_1', 1, 10, dims=('indicators_1'))
lambdas_1 = pm.Deterministic('lambdas1', pt.set_subtensor(lambdas_[0], 1), dims=('indicators_1'))
lambdas_ = pm.Normal('lambdas_2', 1, 10, dims=('indicators_2'))
lambdas_2 = pm.Deterministic('lambdas2', pt.set_subtensor(lambdas_[0], 1), dims=('indicators_2'))
tau = pm.Normal('tau', 3, 10, dims='indicators')
kappa = 0
sd_dist = pm.Exponential.dist(1.0, shape=2)
chol, _, _ = pm.LKJCholeskyCov('chol_cov', n=2, eta=2,
sd_dist=sd_dist, compute_corr=True)
ksi = pm.MvNormal('ksi', kappa, chol=chol, dims=('obs', 'latent'))
m1 = tau[0] + ksi[obs_idx, 0]*lambdas_1[0]
m2 = tau[1] + ksi[obs_idx, 0]*lambdas_1[1]
m3 = tau[2] + ksi[obs_idx, 0]*lambdas_1[2]
m4 = tau[3] + ksi[obs_idx, 1]*lambdas_2[0]
m5 = tau[4] + ksi[obs_idx, 1]*lambdas_2[1]
mu = pm.Deterministic('mu', pm.math.stack([m1, m2, m3, m4, m5]).T)
_ = pm.Normal('likelihood', mu, Psi, observed=df_p.values)
idata = pm.sample(nuts_sampler='numpyro', target_accept=.95)
summary_df = az.summary(idata, var_names=['lambdas1', 'lambdas2', 'tau', 'Psi', 'ksi', 'chol_cov_corr'], coords= {'obs': [0, 7]})py$summary_df |> kable()| mean | sd | hdi_3% | hdi_97% | mcse_mean | mcse_sd | ess_bulk | ess_tail | r_hat | |
|---|---|---|---|---|---|---|---|---|---|
| lambdas1[PI] | 1.000 | 0.000 | 1.000 | 1.000 | 0.000 | 0.000 | 4000 | 4000 | NaN |
| lambdas1[AD] | 0.903 | 0.066 | 0.786 | 1.029 | 0.004 | 0.003 | 340 | 524 | 1.01 |
| lambdas1[IGC] | 0.538 | 0.046 | 0.454 | 0.628 | 0.002 | 0.002 | 473 | 741 | 1.00 |
| lambdas2[FI] | 1.000 | 0.000 | 1.000 | 1.000 | 0.000 | 0.000 | 4000 | 4000 | NaN |
| lambdas2[FC] | 0.979 | 0.056 | 0.871 | 1.082 | 0.003 | 0.002 | 435 | 724 | 1.01 |
| tau[PI] | 3.335 | 0.036 | 3.269 | 3.403 | 0.001 | 0.001 | 631 | 1262 | 1.00 |
| tau[AD] | 3.900 | 0.025 | 3.853 | 3.950 | 0.001 | 0.001 | 423 | 941 | 1.00 |
| tau[IGC] | 4.597 | 0.021 | 4.558 | 4.636 | 0.001 | 0.001 | 702 | 1364 | 1.00 |
| tau[FI] | 3.037 | 0.039 | 2.967 | 3.112 | 0.002 | 0.001 | 474 | 1168 | 1.00 |
| tau[FC] | 3.716 | 0.034 | 3.652 | 3.782 | 0.002 | 0.001 | 389 | 842 | 1.00 |
| Psi[PI] | 0.610 | 0.024 | 0.564 | 0.655 | 0.001 | 0.000 | 1370 | 2631 | 1.00 |
| Psi[AD] | 0.317 | 0.020 | 0.281 | 0.353 | 0.001 | 0.001 | 679 | 1551 | 1.00 |
| Psi[IGC] | 0.355 | 0.013 | 0.332 | 0.380 | 0.000 | 0.000 | 2467 | 3095 | 1.00 |
| Psi[FI] | 0.570 | 0.025 | 0.521 | 0.616 | 0.001 | 0.001 | 1143 | 1769 | 1.00 |
| Psi[FC] | 0.421 | 0.026 | 0.375 | 0.471 | 0.001 | 0.001 | 804 | 1246 | 1.01 |
| ksi[0, Student] | -0.226 | 0.225 | -0.649 | 0.207 | 0.004 | 0.003 | 3803 | 3018 | 1.00 |
| ksi[0, Faculty] | -0.369 | 0.272 | -0.859 | 0.144 | 0.004 | 0.003 | 4030 | 2898 | 1.00 |
| ksi[7, Student] | 0.876 | 0.228 | 0.467 | 1.313 | 0.004 | 0.003 | 2576 | 2864 | 1.00 |
| ksi[7, Faculty] | 0.864 | 0.272 | 0.346 | 1.354 | 0.004 | 0.003 | 4126 | 3083 | 1.00 |
| chol_cov_corr[0, 0] | 1.000 | 0.000 | 1.000 | 1.000 | 0.000 | 0.000 | 4000 | 4000 | NaN |
| chol_cov_corr[0, 1] | 0.852 | 0.028 | 0.801 | 0.905 | 0.001 | 0.001 | 474 | 493 | 1.01 |
| chol_cov_corr[1, 0] | 0.852 | 0.028 | 0.801 | 0.905 | 0.001 | 0.001 | 474 | 493 | 1.01 |
| chol_cov_corr[1, 1] | 1.000 | 0.000 | 1.000 | 1.000 | 0.000 | 0.000 | 3890 | 4000 | 1.00 |
az.plot_trace(idata, var_names=['lambdas1', 'lambdas2', 'tau', 'Psi', 'ksi']);/Users/nathanielforde/mambaforge/envs/pymc_causal/lib/python3.11/site-packages/arviz/stats/density_utils.py:487: UserWarning: Your data appears to have a single value or no finite values
warnings.warn("Your data appears to have a single value or no finite values")
/Users/nathanielforde/mambaforge/envs/pymc_causal/lib/python3.11/site-packages/arviz/stats/density_utils.py:487: UserWarning: Your data appears to have a single value or no finite values
warnings.warn("Your data appears to have a single value or no finite values")
df = pd.read_csv('sem_data.csv')
drivers = ['se_acad_p1', 'se_acad_p2',
'se_acad_p3', 'se_social_p1', 'se_social_p2', 'se_social_p3',
'sup_friends_p1', 'sup_friends_p2', 'sup_friends_p3', 'sup_parents_p1',
'sup_parents_p2', 'sup_parents_p3', 'ls_p1', 'ls_p2', 'ls_p3']
coords = {'obs': list(range(len(df))),
'indicators': drivers,
'indicators_1': ['se_acad_p1','se_acad_p2','se_acad_p3'],
'indicators_2': ['se_social_p1','se_social_p2','se_social_p3'],
'indicators_3': ['sup_friends_p1','sup_friends_p2','sup_friends_p3'],
'indicators_4': [ 'sup_parents_p1','sup_parents_p2','sup_parents_p3'],
'indicators_5': ['ls_p1','ls_p2', 'ls_p3'],
'latent': ['SE_ACAD', 'SE_SOCIAL', 'SUP_F', 'SUP_P', 'LS'],
'latent1': ['SE_ACAD', 'SE_SOCIAL', 'SUP_F', 'SUP_P', 'LS']
}
obs_idx = list(range(len(df)))
with pm.Model(coords=coords) as model:
Psi = pm.InverseGamma('Psi', 5, 10, dims='indicators')
lambdas_ = pm.Normal('lambdas_1', 1, 10, dims=('indicators_1'))
lambdas_1 = pm.Deterministic('lambdas1', pt.set_subtensor(lambdas_[0], 1), dims=('indicators_1'))
lambdas_ = pm.Normal('lambdas_2', 1, 10, dims=('indicators_2'))
lambdas_2 = pm.Deterministic('lambdas2', pt.set_subtensor(lambdas_[0], 1), dims=('indicators_2'))
lambdas_ = pm.Normal('lambdas_3', 1, 10, dims=('indicators_3'))
lambdas_3 = pm.Deterministic('lambdas3', pt.set_subtensor(lambdas_[0], 1), dims=('indicators_3'))
lambdas_ = pm.Normal('lambdas_4', 1, 10, dims=('indicators_4'))
lambdas_4 = pm.Deterministic('lambdas4', pt.set_subtensor(lambdas_[0], 1), dims=('indicators_4'))
lambdas_ = pm.Normal('lambdas_5', 1, 10, dims=('indicators_5'))
lambdas_5 = pm.Deterministic('lambdas5', pt.set_subtensor(lambdas_[0], 1), dims=('indicators_5'))
tau = pm.Normal('tau', 3, 10, dims='indicators')
kappa = 0
sd_dist = pm.Exponential.dist(1.0, shape=5)
chol, _, _ = pm.LKJCholeskyCov('chol_cov', n=5, eta=2,
sd_dist=sd_dist, compute_corr=True)
cov = pm.Deterministic("cov", chol.dot(chol.T), dims=('latent', 'latent1'))
ksi = pm.MvNormal('ksi', kappa, chol=chol, dims=('obs', 'latent'))
m0 = tau[0] + ksi[obs_idx, 0]*lambdas_1[0]
m1 = tau[1] + ksi[obs_idx, 0]*lambdas_1[1]
m2 = tau[2] + ksi[obs_idx, 0]*lambdas_1[2]
m3 = tau[3] + ksi[obs_idx, 1]*lambdas_2[0]
m4 = tau[4] + ksi[obs_idx, 1]*lambdas_2[1]
m5 = tau[5] + ksi[obs_idx, 1]*lambdas_2[2]
m6 = tau[6] + ksi[obs_idx, 2]*lambdas_3[0]
m7 = tau[7] + ksi[obs_idx, 2]*lambdas_3[1]
m8 = tau[8] + ksi[obs_idx, 2]*lambdas_3[2]
m9 = tau[9] + ksi[obs_idx, 3]*lambdas_4[0]
m10 = tau[10] + ksi[obs_idx, 3]*lambdas_4[1]
m11 = tau[11] + ksi[obs_idx, 3]*lambdas_4[2]
m12 = tau[12] + ksi[obs_idx, 4]*lambdas_5[0]
m13 = tau[13] + ksi[obs_idx, 4]*lambdas_5[1]
m14 = tau[14] + ksi[obs_idx, 4]*lambdas_5[2]
mu = pm.Deterministic('mu', pm.math.stack([m0, m1, m2, m3, m4, m5, m6, m7,
m8, m9, m10, m11, m12, m13, m14]).T)
_ = pm.Normal('likelihood', mu, Psi, observed=df[drivers].values)
idata = pm.sample(nuts_sampler='numpyro', target_accept=.95)
summary_df1 = az.summary(idata, var_names=['lambdas1', 'lambdas2', 'lambdas3', 'lambdas4', 'lambdas5', 'tau', 'Psi'])
cov_df = pd.DataFrame(az.extract(idata['posterior'])['cov'].mean(axis=2))
cov_df.index = ['SE_ACAD', 'SE_SOCIAL', 'SUP_F', 'SUP_P', 'LS']
cov_df.columns = ['SE_ACAD', 'SE_SOCIAL', 'SUP_F', 'SUP_P', 'LS']
correlation_df = pd.DataFrame(az.extract(idata['posterior'])['chol_cov_corr'].mean(axis=2))
correlation_df.index = ['SE_ACAD', 'SE_SOCIAL', 'SUP_F', 'SUP_P', 'LS']
correlation_df.columns = ['SE_ACAD', 'SE_SOCIAL', 'SUP_F', 'SUP_P', 'LS']py$summary_df1 |> kable()| mean | sd | hdi_3% | hdi_97% | mcse_mean | mcse_sd | ess_bulk | ess_tail | r_hat | |
|---|---|---|---|---|---|---|---|---|---|
| lambdas1[se_acad_p1] | 1.000 | 0.000 | 1.000 | 1.000 | 0.000 | 0.000 | 4000 | 4000 | NaN |
| lambdas1[se_acad_p2] | 0.818 | 0.054 | 0.712 | 0.914 | 0.002 | 0.001 | 903 | 1627 | 1.01 |
| lambdas1[se_acad_p3] | 0.969 | 0.061 | 0.854 | 1.081 | 0.002 | 0.001 | 925 | 1255 | 1.01 |
| lambdas2[se_social_p1] | 1.000 | 0.000 | 1.000 | 1.000 | 0.000 | 0.000 | 4000 | 4000 | NaN |
| lambdas2[se_social_p2] | 0.960 | 0.059 | 0.852 | 1.074 | 0.002 | 0.002 | 725 | 1398 | 1.00 |
| lambdas2[se_social_p3] | 0.937 | 0.073 | 0.796 | 1.072 | 0.002 | 0.002 | 1030 | 2140 | 1.00 |
| lambdas3[sup_friends_p1] | 1.000 | 0.000 | 1.000 | 1.000 | 0.000 | 0.000 | 4000 | 4000 | NaN |
| lambdas3[sup_friends_p2] | 0.801 | 0.045 | 0.716 | 0.886 | 0.001 | 0.001 | 1107 | 1774 | 1.00 |
| lambdas3[sup_friends_p3] | 0.904 | 0.052 | 0.800 | 1.000 | 0.001 | 0.001 | 1234 | 2111 | 1.00 |
| lambdas4[sup_parents_p1] | 1.000 | 0.000 | 1.000 | 1.000 | 0.000 | 0.000 | 4000 | 4000 | NaN |
| lambdas4[sup_parents_p2] | 1.039 | 0.058 | 0.932 | 1.145 | 0.002 | 0.001 | 907 | 1546 | 1.00 |
| lambdas4[sup_parents_p3] | 1.007 | 0.063 | 0.894 | 1.127 | 0.002 | 0.001 | 1063 | 1654 | 1.00 |
| lambdas5[ls_p1] | 1.000 | 0.000 | 1.000 | 1.000 | 0.000 | 0.000 | 4000 | 4000 | NaN |
| lambdas5[ls_p2] | 0.790 | 0.081 | 0.638 | 0.940 | 0.003 | 0.002 | 551 | 1113 | 1.01 |
| lambdas5[ls_p3] | 0.990 | 0.099 | 0.801 | 1.171 | 0.004 | 0.003 | 519 | 1369 | 1.01 |
| tau[se_acad_p1] | 5.155 | 0.049 | 5.064 | 5.245 | 0.003 | 0.002 | 361 | 788 | 1.01 |
| tau[se_acad_p2] | 5.347 | 0.042 | 5.264 | 5.422 | 0.002 | 0.002 | 389 | 921 | 1.01 |
| tau[se_acad_p3] | 5.211 | 0.049 | 5.119 | 5.303 | 0.002 | 0.002 | 393 | 914 | 1.01 |
| tau[se_social_p1] | 5.291 | 0.045 | 5.209 | 5.379 | 0.002 | 0.002 | 341 | 611 | 1.01 |
| tau[se_social_p2] | 5.479 | 0.041 | 5.401 | 5.556 | 0.002 | 0.002 | 321 | 648 | 1.01 |
| tau[se_social_p3] | 5.442 | 0.049 | 5.352 | 5.534 | 0.002 | 0.002 | 450 | 849 | 1.00 |
| tau[sup_friends_p1] | 5.786 | 0.066 | 5.659 | 5.902 | 0.003 | 0.002 | 372 | 777 | 1.01 |
| tau[sup_friends_p2] | 6.010 | 0.055 | 5.913 | 6.115 | 0.003 | 0.002 | 391 | 942 | 1.01 |
| tau[sup_friends_p3] | 5.991 | 0.065 | 5.870 | 6.114 | 0.003 | 0.002 | 415 | 1011 | 1.01 |
| tau[sup_parents_p1] | 5.975 | 0.063 | 5.856 | 6.090 | 0.003 | 0.002 | 395 | 926 | 1.00 |
| tau[sup_parents_p2] | 5.926 | 0.064 | 5.811 | 6.054 | 0.003 | 0.002 | 386 | 855 | 1.00 |
| tau[sup_parents_p3] | 5.719 | 0.068 | 5.595 | 5.852 | 0.003 | 0.002 | 446 | 921 | 1.00 |
| tau[ls_p1] | 5.193 | 0.056 | 5.094 | 5.304 | 0.002 | 0.002 | 505 | 1251 | 1.00 |
| tau[ls_p2] | 5.778 | 0.044 | 5.699 | 5.860 | 0.002 | 0.001 | 541 | 1289 | 1.00 |
| tau[ls_p3] | 5.223 | 0.052 | 5.122 | 5.319 | 0.002 | 0.002 | 469 | 761 | 1.00 |
| Psi[se_acad_p1] | 0.412 | 0.028 | 0.364 | 0.467 | 0.001 | 0.001 | 1271 | 2427 | 1.00 |
| Psi[se_acad_p2] | 0.413 | 0.023 | 0.367 | 0.455 | 0.000 | 0.000 | 2571 | 2655 | 1.00 |
| Psi[se_acad_p3] | 0.467 | 0.028 | 0.419 | 0.522 | 0.001 | 0.000 | 1976 | 2777 | 1.00 |
| Psi[se_social_p1] | 0.430 | 0.026 | 0.381 | 0.477 | 0.001 | 0.000 | 1511 | 1986 | 1.00 |
| Psi[se_social_p2] | 0.362 | 0.025 | 0.315 | 0.407 | 0.001 | 0.000 | 1211 | 1534 | 1.00 |
| Psi[se_social_p3] | 0.554 | 0.028 | 0.504 | 0.610 | 0.001 | 0.000 | 2475 | 2428 | 1.00 |
| Psi[sup_friends_p1] | 0.514 | 0.040 | 0.440 | 0.590 | 0.001 | 0.001 | 873 | 1159 | 1.00 |
| Psi[sup_friends_p2] | 0.508 | 0.031 | 0.447 | 0.562 | 0.001 | 0.001 | 1508 | 2500 | 1.00 |
| Psi[sup_friends_p3] | 0.625 | 0.036 | 0.564 | 0.697 | 0.001 | 0.001 | 2056 | 2894 | 1.00 |
| Psi[sup_parents_p1] | 0.551 | 0.035 | 0.488 | 0.619 | 0.001 | 0.001 | 1442 | 2421 | 1.00 |
| Psi[sup_parents_p2] | 0.535 | 0.037 | 0.466 | 0.603 | 0.001 | 0.001 | 1418 | 2420 | 1.00 |
| Psi[sup_parents_p3] | 0.675 | 0.039 | 0.603 | 0.749 | 0.001 | 0.001 | 1921 | 2311 | 1.00 |
| Psi[ls_p1] | 0.671 | 0.037 | 0.600 | 0.742 | 0.001 | 0.001 | 1168 | 2521 | 1.00 |
| Psi[ls_p2] | 0.533 | 0.031 | 0.474 | 0.590 | 0.001 | 0.001 | 1471 | 2421 | 1.00 |
| Psi[ls_p3] | 0.623 | 0.036 | 0.555 | 0.689 | 0.001 | 0.001 | 1512 | 1851 | 1.00 |
az.plot_forest(idata, var_names=['lambdas1', 'lambdas2', 'lambdas3', 'lambdas4', 'lambdas5'], combined=True);py$cov_df |> kable()| SE_ACAD | SE_SOCIAL | SUP_F | SUP_P | LS | |
|---|---|---|---|---|---|
| SE_ACAD | 0.4706940 | 0.2603813 | 0.0631096 | 0.2027756 | 0.2216762 |
| SE_SOCIAL | 0.2603813 | 0.3974050 | 0.1868705 | 0.2677145 | 0.3040071 |
| SUP_F | 0.0631096 | 0.1868705 | 1.0358624 | 0.1195301 | 0.1642341 |
| SUP_P | 0.2027756 | 0.2677145 | 0.1195301 | 0.8623949 | 0.3841457 |
| LS | 0.2216762 | 0.3040071 | 0.1642341 | 0.3841457 | 0.4221140 |
py$correlation_df |> kable()| SE_ACAD | SE_SOCIAL | SUP_F | SUP_P | LS | |
|---|---|---|---|---|---|
| SE_ACAD | 1.0000000 | 0.6025576 | 0.0903843 | 0.3184307 | 0.4987059 |
| SE_SOCIAL | 0.6025576 | 1.0000000 | 0.2916484 | 0.4574083 | 0.7448812 |
| SUP_F | 0.0903843 | 0.2916484 | 1.0000000 | 0.1264455 | 0.2493548 |
| SUP_P | 0.3184307 | 0.4574083 | 0.1264455 | 1.0000000 | 0.6389961 |
| LS | 0.4987059 | 0.7448812 | 0.2493548 | 0.6389961 | 1.0000000 |
Citation
BibTeX citation:
@online{forde,
author = {Nathaniel Forde},
title = {Confirmatory {Factor} {Analysis} and {Structural} {Equation}
{Models}},
langid = {en}
}
For attribution, please cite this work as:
Nathaniel Forde. n.d. “Confirmatory Factor Analysis and Structural
Equation Models.”